library(haven)
library(dplyr)
library(tidyr)
library(countrycode)
library(ggplot2)
library(lme4)
library(effects)
library(texreg)
library(estimatr)
library(grid)
library(gridExtra)
library(fmsb)
library(sandwich)
library(clubSandwich)
library(lmtest)
library(ggeffects)
library(lubridate)
library(testthat)
options(scipen = 999)

rm(list = ls())

path <- "/Users/eborbath/Library/CloudStorage/Dropbox-WZB/Endre Borbath/Papers/Unions/Replication_material/" # you need to correct it here

getmode <- function(v) {
    uniqv <- unique(v)
    uniqv[which.max(tabulate(match(v, uniqv)))]
}

find_single_mode <- function(x) {
  tbl <- table(x)
  mode_level <- names(tbl[tbl == max(tbl)])[1]
  return(mode_level)
}

facmin <- function(n) {
    min(as.numeric(levels(n)))
}

facmax <- function(x) {
    max(as.numeric(levels(x)))
}

range01 <- function(x) {
    (x - min(x, na.rm = TRUE)) / (max(x, na.rm = TRUE) - min(x, na.rm = TRUE))
}

pea_dat <- read_dta(paste0(path, "reg_vars.dta")) %>%
    mutate(
        unions = as.numeric(unions),
        econ_private = as.factor(econ_private),
        econ_public = as.factor(econ_public),
        political = as.factor(political),
        cultural = as.factor(cultural),
        demo = as.factor(demo),
        strike = as.factor(strike),
        socialgroups_occup = as.factor(socialgroups_occup),
        other_SG = as.factor(other_SG),
        parties = as.factor(parties),
        part_num = as.numeric(part_num),
        region_Visser = as.factor(region_Visser),
        crisis = as.factor(crisis),
        unemployment_q = as.numeric(unemployment_q),
        cab_leftright = as.numeric(cab_leftright),
        Union_Density = as.numeric(Union_Density),
        Level_new = as.factor(Level_new),
        Tripartite_Council = as.factor(Tripartite_Council),
        RI = as.factor(RI),
        Eff_Number_Unions = as.numeric(Eff_Number_Unions),
        iso2code = as.factor(iso2code),
        str_label = as.factor(str_label)
    ) %>%
    mutate(Union_Density_sq = Union_Density^2) %>%
    mutate_if(is.numeric, funs(range01(.))) %>% 
  mutate(country=substring(as.character(as.factor(str_label)), 1, 2),
         date_q=floor_date(edate, unit="quarter"))
  

############################
### crisis exposure data ###
############################

# loading the datasets

cases_deaths <- read.csv(paste0(path, "cases_deaths.csv")) 
head(cases_deaths)

cases_deaths <- cases_deaths %>% 
  rename(covid_cases_q=Daily.new.confirmed.cases.of.COVID.19,
         covid_deaths_q=Daily.new.confirmed.deaths.due.to.COVID.19) %>% 
  mutate(country=countrycode(as.character(Entity), "country.name", "iso2c")) %>% 
  filter(!is.na(country)) %>%
  select(-Code) %>% 
  mutate(date=ymd(Day)) %>% 
  mutate(date_q = floor_date(date, unit="quarter")) %>% 
  group_by(country, date_q) %>%
  mutate_at(vars(covid_cases_q, covid_deaths_q), ~mean(., na.rm=TRUE)) %>% 
  select(country, date_q, covid_cases_q, covid_deaths_q) %>% 
  distinct() %>% 
  mutate(covid_countries = 1) %>% 
  mutate(country=ifelse(country=="GB", "UK", country))
  
table(cases_deaths$date_q)


lock <- read.csv(paste0(path, "LOCK.csv"))
head(lock)

lock <- lock %>% 
  rename(stringency_index=stringency_index) %>% 
  mutate(country=countrycode(as.character(Entity), "country.name", "iso2c")) %>% 
  filter(!is.na(country)) %>%
  select(-Code) %>% 
  mutate(date=ymd(Day)) %>% 
  mutate(date_q = floor_date(date, unit="quarter")) %>% 
  group_by(country, date_q) %>%
  mutate_at(vars(stringency_index), ~mean(., na.rm=TRUE)) %>% 
  select(country, date_q, stringency_index) %>% 
  distinct() %>% 
  mutate(lock_countries = 1) %>% 
  mutate(country=ifelse(country=="GB", "UK", country))

refugee_arrivals <- readxl::read_xlsx(paste0(path, "migr_asyappctzm_page_spreadsheet.xlsx"),
                                      sheet="Sheet 1",
                                      range="A12:GL47") %>% 
  mutate_at(vars(2:194), ~ifelse(.==":", as.numeric(NA), as.numeric(.))) %>%
  pivot_longer(2:194, names_to = "date", values_to = "refugee_arrivals") %>% 
  mutate(country=countrycode(as.character(TIME), "country.name", "iso2c")) %>% 
  filter(!is.na(country)) %>%
  select(-TIME) %>% 
  mutate(year=as.numeric(substr(date, 1, 4))) %>%
  mutate(month=as.numeric(substr(date, 6, 7))) %>% 
  select(-date)

pop <- gt::countrypops %>% 
  mutate(country=countrycode(as.character(country_name), "country.name", "iso2c")) %>% 
  select(country, year, population)

refugee_arrivals <- left_join(refugee_arrivals, pop, by=c("country", "year")) %>% 
  arrange(country, year) %>% 
  filter(year<=2021) %>% 
  filter(year>=2000) %>% 
  mutate(refugee_arrivals=refugee_arrivals*100/population) %>%
  mutate(date_q=floor_date(ymd(paste(year, month, "01", sep="-")), unit="quarter")) %>%
  group_by(country, date_q) %>%
  mutate(refugee_arrivals_q=mean(refugee_arrivals, na.rm=TRUE)) %>%
  ungroup() %>% 
  select(country, date_q, refugee_arrivals_q) %>% 
  distinct() %>%
  mutate(refugee_countries = 1) %>% 
  na.omit()

google_trends <- read.csv(paste0(path, "integrated_trends.csv")) %>% 
  select(-X)
head(google_trends)

google_trends <- google_trends %>% 
  rename(country=Country) %>% 
  filter(!is.na(country)) %>%
  mutate(date=ymd(paste(year, month, "01", sep="-"))) %>% 
  mutate(date_q = floor_date(date, unit="quarter")) %>% 
  group_by(country, date_q) %>%
  mutate_at(vars(comp_refugee, comp_covid, comp_euro), ~mean(., na.rm=TRUE)) %>% 
  select(country, date_q, comp_refugee, comp_covid, comp_euro) %>% 
  distinct() %>% 
  mutate_at(vars(comp_refugee, comp_covid, comp_euro), ~ifelse(is.na(.), 0, .)) %>% 
  mutate(gtrends_countries = 1) %>% 
  mutate(country=ifelse(country=="GB", "UK", country))


crisis_dat <- full_join(cases_deaths, lock, by=c("country", "date_q")) %>% 
  full_join(refugee_arrivals, by=c("country", "date_q")) %>% 
  full_join(google_trends, by=c("country", "date_q")) %>% 
  distinct()

rm(cases_deaths, lock, refugee_arrivals, google_trends)

# Check if "country" and "date_q" together form unique keys
test_that("Check uniqueness of country and date_q in crisis_dat", {
  duplicated_rows <- crisis_dat[duplicated(crisis_dat[c("country", "date_q")]), ]
  
  # Ensure there are no duplicated rows
  expect_equal(nrow(duplicated_rows), 0, 
               info = "There should be no duplicated rows based on 'country' and 'date_q'.")
  rm(duplicated_rows)
})

pea_dat <- left_join(pea_dat, crisis_dat, by=c("country", "date_q")) %>% 
  group_by(iso2code) %>% 
  mutate_at(vars(contains("_countries")), ~mean(., na.rm=TRUE)) %>%
  ungroup(.) %>% 
  mutate_at(vars(covid_deaths_q, covid_cases_q), 
            ~ifelse(!is.na(covid_countries) & is.na(.), 0, .)) %>% 
  mutate_at(vars(refugee_arrivals_q), 
            ~ifelse(!is.na(refugee_countries) & is.na(.), 0, .)) %>% 
  mutate_at(vars(stringency_index), 
            ~ifelse(!is.na(lock_countries) & is.na(.), 0, .)) %>% 
  mutate_at(vars(contains("comp_")), 
            ~ifelse(!is.na(gtrends_countries) & is.na(.), 0, .)) %>% 
  mutate_at(vars(covid_deaths_q, covid_cases_q, refugee_arrivals_q,
                 stringency_index, contains("comp_")), ~range01(.))
  

rm(crisis_dat)


# Table 4

M1 <- glmer(unions ~ econ_private + econ_public + political + cultural + demo + strike + socialgroups_occup + other_SG + parties +
    part_num + region_Visser + crisis + unemployment_q + cab_leftright +
    1 + (1 | iso2code) + (1 | str_label), data = pea_dat, family = binomial(link = "logit"), nAGQ = 0L)

summary(M1)

M2 <- glmer(private_public ~ socialgroups_occup + other_SG + parties + part_num + crisis +
    region_Visser + cab_leftright + unemployment_q +
    1 + (1 | iso2code) + (1 | str_label), data = pea_dat, family = binomial(link = "logit"), nAGQ = 0L)

summary(M2)

htmlreg(
    l = list(M1, M2), doctype = FALSE, center = TRUE, siunitx = TRUE,
    custom.model.names = c("Model 1 Union Sponsorship of Protest (=1)", "Model 2 Movement Character of Union Protest (=1)"),
    single.row = TRUE, no.margin = TRUE,
    caption = "",
    include.variance = FALSE,
    include.aic = FALSE,
    include.bic = FALSE,
    omit.coef = "(Intercept)",
    custom.coef.map = list(
        region_Visser2 = "Social-partnership (West)",
        region_Visser3 = "State-centred (South)",
        region_Visser4 = "Liberal",
        region_Visser5 = "CEE regimes",
        econ_private1 = "Private economic (0,1)",
        econ_public1 = "Public economic (0,1)",
        political1 = "Political (0,1)",
        cultural1 = "Cultural (0,1)",
        demo1 = "Demonstration (0,1)",
        strike1 = "Strike (0,1)",
        socialgroups_occup1 = "Occupational social group (0,1)",
        other_SG1 = "Other social group (0,1)",
        parties1 = "Parties (0,1)",
        part_num = "Participation rate",
        crisis1 = "Shock period",
        crisis2 = "Euro crisis",
        crisis3 = "Post-crises years (after 2015)",
        cab_leftright = "Cabinet left-right",
        unemployment_q = "Unemployment (quarterly)"
        # gdp_per_capita = "GDP per capita"
    ),
    groups = list(
        "Region (Ref: Corporatist: North)" = 1:4,
        "Protest Issues" = 5:8,
        "Action Forms" = 9:10,
        "Protest Actors" = 11:13,
        "Crisis (Ref: Normal Times)" = 15:17,
        "Political & economic context" = 18:19
    ),
    include.deviance = FALSE,
    include.loglik = FALSE,
    file = paste0(path, "Figures/table4.html"),
)


# marginal effects from model 1


newdat <- expand.grid(
    unions = getmode(pea_dat$unions),
    econ_private = getmode(pea_dat$econ_private),
    econ_public = getmode(pea_dat$econ_public),
    political = getmode(pea_dat$political),
    cultural = getmode(pea_dat$cultural),
    demo = getmode(pea_dat$demo),
    strike = getmode(pea_dat$strike),
    socialgroups_occup = getmode(pea_dat$socialgroups_occup),
    other_SG = getmode(pea_dat$other_SG),
    parties = getmode(pea_dat$parties),
    part_num = mean(pea_dat$part_num, na.rm = TRUE),
    crisis = getmode(pea_dat$crisis),
    region_Visser = as.factor(seq(facmin(pea_dat$region_Visser), facmax(pea_dat$region_Visser), 1)),
    cab_leftright = mean(pea_dat$cab_leftright, na.rm = TRUE),
    # gdp_per_capita = mean(pea_dat$gdp_per_capita, na.rm = TRUE),
    unemployment_q = mean(pea_dat$unemployment_q, na.rm = TRUE)
)

mm <- model.matrix(terms(M1), newdat)
newdat$private_public <- predict(M1, newdat, re.form = NA)
pvar1 <- diag(mm %*% tcrossprod(vcov(M1), mm))
cmult <- qnorm(0.975)

## lower and upper CI
newdat <- data.frame(
    newdat,
    plo = newdat$unions - cmult * sqrt(pvar1),
    phi = newdat$unions + cmult * sqrt(pvar1)
)

newdat <- newdat %>%
    mutate(labels = case_when(
        region_Visser == 1 ~ "Corporatism (North)",
        region_Visser == 2 ~ "Social-partnership (West)",
        region_Visser == 3 ~ "State-centered (South)",
        region_Visser == 4 ~ "Liberal",
        region_Visser == 5 ~ "CEE regimes"
    )) %>%
    mutate(labels = factor(labels, levels = c(
        "CEE regimes",
        "Liberal",
        "State-centered (South)",
        "Social-partnership (West)",
        "Corporatism (North)"
    )))

ggplot(data = newdat, aes(
    x = plogis(unions), y = as_factor(`labels`),
    xmin = plogis(plo), xmax = plogis(phi)
)) +
    geom_point() +
    geom_pointrange() +
    xlim(0, 1) +
    ylab("") +
    xlab("Predicted Probabilities") +
    theme_bw() +
    theme(
        legend.title = element_blank(),
        legend.position = "bottom"
    )

ggsave(
    plot = last_plot(),
    path = paste0(path, "/Appendix/"),
    filename = "model1_margins.png",
    scale = 0.75,
    dpi = 400,
    width = 7,
    height = 6
)

# Figure 5

newdat <- expand.grid(
    private_public = getmode(subset(pea_dat, !is.na(private_public))$private_public),
    socialgroups_occup = getmode(subset(pea_dat, !is.na(private_public))$socialgroups_occup),
    other_SG = getmode(subset(pea_dat, !is.na(private_public))$other_SG),
    parties = getmode(subset(pea_dat, !is.na(private_public))$parties),
    part_num = mean(subset(pea_dat, !is.na(private_public))$part_num, na.rm = TRUE),
    crisis = getmode(subset(pea_dat, !is.na(private_public))$crisis),
    region_Visser = as.factor(seq(
        facmin(subset(pea_dat, !is.na(private_public))$region_Visser),
        facmax(subset(pea_dat, !is.na(private_public))$region_Visser), 1
    )),
    cab_leftright = mean(subset(pea_dat, !is.na(private_public))$cab_leftright, na.rm = TRUE),
    # gdp_per_capita = mean(subset(pea_dat, !is.na(private_public))$gdp_per_capita, na.rm = TRUE),
    unemployment_q = mean(subset(pea_dat, !is.na(private_public))$unemployment_q, na.rm = TRUE)
)

mm <- model.matrix(terms(M2), newdat)
newdat$private_public <- predict(M2, newdat, re.form = NA)
pvar1 <- diag(mm %*% tcrossprod(vcov(M2), mm))
cmult <- qnorm(0.975)

## lower and upper CI
newdat <- data.frame(
    newdat,
    plo = newdat$private_public - cmult * sqrt(pvar1),
    phi = newdat$private_public + cmult * sqrt(pvar1)
)

newdat <- newdat %>%
    mutate(labels = case_when(
        region_Visser == 1 ~ "Corporatism (North)",
        region_Visser == 2 ~ "Social-partnership (West)",
        region_Visser == 3 ~ "State-centered (South)",
        region_Visser == 4 ~ "Liberal",
        region_Visser == 5 ~ "CEE regimes"
    )) %>%
    mutate(labels = factor(labels, levels = c(
        "CEE regimes",
        "Liberal",
        "State-centered (South)",
        "Social-partnership (West)",
        "Corporatism (North)"
    )))

ggplot(data = newdat, aes(
    x = plogis(private_public), y = as_factor(`labels`),
    xmin = plogis(plo), xmax = plogis(phi)
)) +
    geom_point() +
    geom_pointrange() +
    xlim(0, 1) +
    ylab("") +
    xlab("Predicted Probabilities") +
    theme_bw() +
    theme(
        legend.title = element_blank(),
        legend.position = "bottom"
    )

ggsave(
    plot = last_plot(),
    path = paste0(path, "/Figures/"),
    filename = "figure5.png",
    scale = 0.75,
    dpi = 700,
    width = 7,
    height = 6
)

ggsave(
  plot = last_plot(),
  path = paste0(path, "/Figures/"),
  filename = "figure5.eps",
  scale = 0.75,
  dpi = 700,
  width = 7,
  height = 6
)

# changing up the reference category in model 2

M2_ref1 <- glmer(formula=update(formula(M2), ~ . - region_Visser + relevel(region_Visser, ref=1)), data=pea_dat, 
                 family=binomial(link="logit"), nAGQ=0L)

M2_ref2 <- glmer(formula=update(formula(M2), ~ . - region_Visser + relevel(region_Visser, ref=2)), data=pea_dat, 
                 family=binomial(link="logit"), nAGQ=0L)

M2_ref3 <- glmer(formula=update(formula(M2), ~ . - region_Visser + relevel(region_Visser, ref=3)), data=pea_dat, 
                 family=binomial(link="logit"), nAGQ=0L)

M2_ref4 <- glmer(formula=update(formula(M2), ~ . - region_Visser + relevel(region_Visser, ref=4)), data=pea_dat, 
                 family=binomial(link="logit"), nAGQ=0L)

M2_ref5 <- glmer(formula=update(formula(M2), ~ . - region_Visser + relevel(region_Visser, ref=5)), data=pea_dat, 
                 family=binomial(link="logit"), nAGQ=0L)


htmlreg(
  l = list(M2_ref1, M2_ref2, M2_ref3,
           M2_ref4, M2_ref5), doctype = FALSE, center = TRUE, siunitx = TRUE,
  custom.model.names = c("Ref: Corporatism (North)", "Ref: Social-partnership (West)",
                         "Ref: State-centred (South)", "Ref: Liberal", "Ref: CEE regimes"),
  custom.gof.rows = list(
    "All other vars from table 4" = rep("Yes", 5)),
  single.row = TRUE, no.margin = TRUE,
  caption = "",
  include.variance = FALSE,
  include.aic = FALSE,
  include.bic = FALSE,
  omit.coef = "(Intercept)",
  custom.coef.map = list(
    "relevel(region_Visser, ref = 1)1" = "Corporatism (North)",
    "relevel(region_Visser, ref = 2)1" = "Corporatism (North)",
    "relevel(region_Visser, ref = 3)1" = "Corporatism (North)",
    "relevel(region_Visser, ref = 4)1" = "Corporatism (North)",
    "relevel(region_Visser, ref = 5)1" = "Corporatism (North)",
    "relevel(region_Visser, ref = 1)2" = "Social-partnership (West)",
    "relevel(region_Visser, ref = 2)2" = "Social-partnership (West)",
    "relevel(region_Visser, ref = 3)2" = "Social-partnership (West)",
    "relevel(region_Visser, ref = 4)2" = "Social-partnership (West)",
    "relevel(region_Visser, ref = 5)2" = "Social-partnership (West)",
    "relevel(region_Visser, ref = 1)3" = "State-centred (South)",
    "relevel(region_Visser, ref = 2)3" = "State-centred (South)",
    "relevel(region_Visser, ref = 3)3" = "State-centred (South)",
    "relevel(region_Visser, ref = 4)3" = "State-centred (South)",
    "relevel(region_Visser, ref = 5)3" = "State-centred (South)",
    "relevel(region_Visser, ref = 1)4" = "Liberal",
    "relevel(region_Visser, ref = 2)4" = "Liberal",
    "relevel(region_Visser, ref = 3)4" = "Liberal",
    "relevel(region_Visser, ref = 4)4" = "Liberal",
    "relevel(region_Visser, ref = 5)4" = "Liberal",
    "relevel(region_Visser, ref = 1)5" = "CEE regimes",
    "relevel(region_Visser, ref = 2)5" = "CEE regimes",
    "relevel(region_Visser, ref = 3)5" = "CEE regimes",
    "relevel(region_Visser, ref = 4)5" = "CEE regimes",
    "relevel(region_Visser, ref = 5)5" = "CEE regimes"
  ),
  include.deviance = FALSE,
  include.loglik = FALSE,
  file = paste0(path, "/Appendix/ref_values_table.html"),
)


# appendix, split sample, robustness check 1

M1_pre_2008 <- glmer(formula = update(formula(M1), ~ . - crisis), data = subset(pea_dat, pre_crisis == 1), family = binomial(link = "logit"), nAGQ = 0L)
M1_post_2008 <- glmer(formula = update(formula(M1), ~ . - crisis), data = subset(pea_dat, pre_crisis == 0), family = binomial(link = "logit"), nAGQ = 0L)

M2_pre_2008 <- glmer(formula = update(formula(M2), ~ . - crisis), data = subset(pea_dat, pre_crisis == 1), family = binomial(link = "logit"), nAGQ = 0L)
M2_post_2008 <- glmer(formula = update(formula(M2), ~ . - crisis), data = subset(pea_dat, pre_crisis == 0), family = binomial(link = "logit"), nAGQ = 0L)


clean_model <- function(model_obj) {
    # Extract the model
    model_obj <- extract(model_obj)

    # Exponentiate coefficients
    model_obj@coef <- exp(model_obj@coef)

    # Set standard errors to numeric (you can set them to any value you want)
    model_obj@se <- as.numeric(rep(NA, length(model_obj@se)))

    # Modify gof.names and gof.decimal
    model_obj@gof.names <- as.character(c(
        "", "", "", "Num. obs.", "Num. groups: country*year",
        "Num. groups: countries", "", ""
    ))
    model_obj@gof.decimal <- c(FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE)

    # Set some elements of gof to NA
    model_obj@gof[1:3] <- as.numeric(rep(NA, 3))
    model_obj@gof[7:8] <- as.numeric(rep(NA, 2))

    return(model_obj)
}

htmlreg(
    l = list(
        clean_model(M1_pre_2008), clean_model(M1_post_2008),
        clean_model(M2_pre_2008), clean_model(M2_post_2008)
    ), doctype = FALSE, center = TRUE, siunitx = TRUE,
    custom.model.names = c("DV1: 2000-2007", "DV1: 2008-2015", "DV2: 2000-2007", "DV2: 2008-2015"),
    single.row = TRUE, no.margin = TRUE,
    caption = "",
    include.variance = FALSE,
    include.aic = FALSE,
    include.bic = FALSE,
    omit.coef = "(Intercept)",
    custom.coef.map = list(
        region_Visser2 = "Social-partnership (West)",
        region_Visser3 = "State-centred (South)",
        region_Visser4 = "Liberal",
        region_Visser5 = "CEE regimes",
        econ_private1 = "Private economic (0,1)",
        econ_public1 = "Public economic (0,1)",
        political1 = "Political (0,1)",
        cultural1 = "Cultural (0,1)",
        demo1 = "Demonstration (0,1)",
        strike1 = "Strike (0,1)",
        socialgroups_occup1 = "Occupational social group (0,1)",
        other_SG1 = "Other social group (0,1)",
        parties1 = "Parties (0,1)",
        part_num = "Participation rate",
        cab_leftright = "Cabinet left-right",
        unemployment_q = "Unemployment (quarterly)"
        # gdp_per_capita = "GDP per capita"
    ),
    groups = list(
        "Region (Ref: Corporatist: North)" = 1:4,
        "Protest Issues" = 5:8,
        "Action Forms" = 9:10,
        "Protest Actors" = 11:14,
        "Political & economic context" = 15:16
    ),
    include.deviance = FALSE,
    include.loglik = FALSE,
    file = paste0(path, "/Appendix/table4_split_sample.html"),
)

# split sample figure 2

newdat <- expand.grid(
    private_public = getmode(subset(pea_dat, !is.na(private_public))$private_public),
    socialgroups_occup = getmode(subset(pea_dat, !is.na(private_public))$socialgroups_occup),
    other_SG = getmode(subset(pea_dat, !is.na(private_public))$other_SG),
    parties = getmode(subset(pea_dat, !is.na(private_public))$parties),
    part_num = mean(subset(pea_dat, !is.na(private_public))$part_num, na.rm = TRUE),
    region_Visser = as.factor(seq(
        facmin(subset(pea_dat, !is.na(private_public))$region_Visser),
        facmax(subset(pea_dat, !is.na(private_public))$region_Visser), 1
    )),
    cab_leftright = mean(subset(pea_dat, !is.na(private_public))$cab_leftright, na.rm = TRUE),
    # gdp_per_capita = mean(subset(pea_dat, !is.na(private_public))$gdp_per_capita, na.rm = TRUE),
    unemployment_q = mean(subset(pea_dat, !is.na(private_public))$unemployment_q, na.rm = TRUE)
)

mm <- model.matrix(terms(M2_pre_2008), newdat)
newdat$private_public <- predict(M2_pre_2008, newdat, re.form = NA)
pvar1 <- diag(mm %*% tcrossprod(vcov(M2_pre_2008), mm))
cmult <- qnorm(0.975)

## lower and upper CI
newdat <- data.frame(
    newdat,
    plo = newdat$private_public - cmult * sqrt(pvar1),
    phi = newdat$private_public + cmult * sqrt(pvar1)
)

mar_dat_pre <- newdat %>%
    mutate(labels = case_when(
        region_Visser == 1 ~ "Corporatism (North)",
        region_Visser == 2 ~ "Social-partnership (West)",
        region_Visser == 3 ~ "State-centered (South)",
        region_Visser == 4 ~ "Liberal",
        region_Visser == 5 ~ "CEE regimes"
    )) %>%
    mutate(labels = factor(labels, levels = c(
        "CEE regimes",
        "Liberal",
        "State-centered (South)",
        "Social-partnership (West)",
        "Corporatism (North)"
    ))) %>%
    select(region_Visser, private_public, plo, phi, labels) %>%
    mutate(type = "Pre-crisis period (< Sep. 2008)")


newdat <- expand.grid(
    private_public = getmode(subset(pea_dat, !is.na(private_public))$private_public),
    socialgroups_occup = getmode(subset(pea_dat, !is.na(private_public))$socialgroups_occup),
    other_SG = getmode(subset(pea_dat, !is.na(private_public))$other_SG),
    parties = getmode(subset(pea_dat, !is.na(private_public))$parties),
    part_num = mean(subset(pea_dat, !is.na(private_public))$part_num, na.rm = TRUE),
    region_Visser = as.factor(seq(
        facmin(subset(pea_dat, !is.na(private_public))$region_Visser),
        facmax(subset(pea_dat, !is.na(private_public))$region_Visser), 1
    )),
    cab_leftright = mean(subset(pea_dat, !is.na(private_public))$cab_leftright, na.rm = TRUE),
    # gdp_per_capita = mean(subset(pea_dat, !is.na(private_public))$gdp_per_capita, na.rm = TRUE),
    unemployment_q = mean(subset(pea_dat, !is.na(private_public))$unemployment_q, na.rm = TRUE)
)

mm <- model.matrix(terms(M2_post_2008), newdat)
newdat$private_public <- predict(M2_post_2008, newdat, re.form = NA)
pvar1 <- diag(mm %*% tcrossprod(vcov(M2_post_2008), mm))
cmult <- qnorm(0.975)

## lower and upper CI
newdat <- data.frame(
    newdat,
    plo = newdat$private_public - cmult * sqrt(pvar1),
    phi = newdat$private_public + cmult * sqrt(pvar1)
)

mar_dat_post <- newdat %>%
    mutate(labels = case_when(
        region_Visser == 1 ~ "Corporatism (North)",
        region_Visser == 2 ~ "Social-partnership (West)",
        region_Visser == 3 ~ "State-centered (South)",
        region_Visser == 4 ~ "Liberal",
        region_Visser == 5 ~ "CEE regimes"
    )) %>%
    mutate(labels = factor(labels, levels = c(
        "CEE regimes",
        "Liberal",
        "State-centered (South)",
        "Social-partnership (West)",
        "Corporatism (North)"
    ))) %>%
    select(region_Visser, private_public, plo, phi, labels) %>%
    mutate(type = "Crisis period (> Sep. 2008)")

mar_dat <- bind_rows(mar_dat_pre, mar_dat_post)


GeomPointrange$draw_key <- function(data, params, size) {
    draw_key_vpath <- function(data, params, size) {
        # only need to change the x&y coords so that the line is horizontal
        # originally, the vertical line was `0.5, 0.1, 0.5, 0.9`
        segmentsGrob(0.1, 0.5, 0.9, 0.5,
            gp = gpar(
                col = alpha(data$colour, data$alpha),
                lwd = data$size * .pt, lty = data$linetype,
                lineend = "butt"
            ), arrow = params$arrow
        )
    }

    grid::grobTree(
        draw_key_vpath(data, params, size),
        draw_key_point(transform(data, size = data$size * 4), params)
    )
}

ggplot(data = mar_dat, aes(
    x = plogis(private_public), y = as_factor(`labels`),
    xmin = plogis(plo), xmax = plogis(phi),
    group = type, color = type
)) +
    geom_pointrange(aes(shape = type),
        position = position_dodge2(width = 0.5, padding = 0.5)
    ) +
    xlim(0, 1) +
    ylab("") +
    xlab("Predicted Probabilities") +
    guides(
        colour = guide_legend(reverse = T),
        shape = guide_legend(reverse = T)
    ) +
    theme_bw() +
    theme(
        legend.title = element_blank(),
        legend.position = "bottom"
    )

ggsave(
    plot = last_plot(),
    path = paste0(path, "/Appendix/"),
    filename = "figure5_split_r.png",
    scale = 0.87,
    dpi = 400,
    width = 7,
    height = 6
)

# robustness check 2 with institutional control variables

# DV1

eff_sample <- pea_dat %>%
    select(
        unions, Level_new, Tripartite_Council, RI, Eff_Number_Unions,
        Union_Density, Union_Density_sq, econ_private, econ_public,
        political, cultural, demo, strike, socialgroups_occup, other_SG,
        parties, part_num, region_Visser, crisis, unemployment_q,
        cab_leftright, gdp_per_capita, iso2code, str_label
    ) %>%
    na.omit()

M1_rob <- glmer(unions ~ Level_new + Tripartite_Council + RI + Eff_Number_Unions + Union_Density +
    1 + (1 | iso2code) + (1 | str_label), data = eff_sample, family = binomial(link = "logit"), nAGQ = 0L)

M2_rob <- glmer(update(formula(M1_rob), ~ . + Union_Density_sq),
    data = eff_sample, family = binomial(link = "logit"), nAGQ = 0L
)
M3_rob <- glmer(update(formula(M1_rob), ~ . + econ_private + econ_public + political + cultural +
    demo + strike + socialgroups_occup + other_SG + parties +
    part_num), data = eff_sample, family = binomial(link = "logit"), nAGQ = 0L)

M4_rob <- glmer(update(formula(M3_rob), ~ . + region_Visser),
    data = eff_sample, family = binomial(link = "logit"), nAGQ = 0L
)

M5_rob <- glmer(update(formula(M4_rob), ~ . + crisis + unemployment_q + cab_leftright + gdp_per_capita),
    data = eff_sample, family = binomial(link = "logit"), nAGQ = 0L
)

pea_dat2 <- pea_dat %>% 
  filter(!is.na(refugee_countries)) %>%
  select(
    unions, econ_private, econ_public,
    political, cultural, demo, strike, socialgroups_occup, other_SG,
    parties, part_num, region_Visser, crisis, unemployment_q,
    cab_leftright, gdp_per_capita, iso2code, str_label, covid_cases_q, 
    covid_deaths_q, refugee_arrivals_q, iso2c) %>% 
  na.omit()

M6_rob <- glmer(unions ~ econ_private + econ_public + political + 
  cultural + demo + strike + socialgroups_occup + other_SG + 
  parties + part_num + region_Visser + crisis + unemployment_q + cab_leftright + 
  gdp_per_capita + covid_cases_q + refugee_arrivals_q + 
  (1 | iso2code) + (1 | str_label), data = pea_dat2,
family = binomial(link = "logit"), nAGQ = 0L)

htmlreg(
    l = list(
        clean_model(M1_rob), clean_model(M2_rob),
        clean_model(M3_rob), clean_model(M4_rob), clean_model(M5_rob), 
        clean_model(M6_rob)
    ), doctype = FALSE, center = TRUE, siunitx = TRUE,
    single.row = TRUE, no.margin = TRUE,
    caption = "",
    include.variance = FALSE,
    include.aic = FALSE,
    include.bic = FALSE,
    omit.coef = "(Intercept)",
    custom.coef.map = list(
        region_Visser2 = "Social-partnership (West)",
        region_Visser3 = "State-centred (South)",
        region_Visser4 = "Liberal",
        region_Visser5 = "CEE regimes",
        Level_new2 = "Intermediate central & industry",
        Level_new3 = "Sector/industry level",
        Level_new4 = "Intermediate industry & company",
        Level_new5 = "Company level",
        Tripartite_Council1 = "Council with different actors",
        Tripartite_Council2 = "Classical tripartite council",
        RI1 = "Partial concertation",
        RI2 = "Full concertation",
        Eff_Number_Unions = "Effective number of unions",
        Union_Density = "Union density",
        Union_Density_sq = "Union density squared",
        econ_private1 = "Private economic (0,1)",
        econ_public1 = "Public economic (0,1)",
        political1 = "Political (0,1)",
        cultural1 = "Cultural (0,1)",
        demo1 = "Demonstration (0,1)",
        strike1 = "Strike (0,1)",
        socialgroups_occup1 = "Occupational social group (0,1)",
        other_SG1 = "Other social group (0,1)",
        parties1 = "Parties (0,1)",
        part_num = "Participation rate",
        crisis1 = "Shock period",
        crisis2 = "Euro crisis",
        crisis3 = "Post-crises years (after 2015)",
        cab_leftright = "Cabinet left-right",
        unemployment_q = "Unemployment (quarterly)",
        gdp_per_capita = "GDP per capita",
        covid_cases_q = "Covid cases (quarterly)",
        refugee_arrivals_q = "Refugee arrivals (quarterly)"
    ),
    groups = list(
        "Region (Ref: Corporatist: North)" = 1:4,
        "Bargaining Level (Ref: Central)" = 5:8,
        "Tripartite Council (Ref: None)" = 9:10,
        "Routine Involvement (Ref: None)" = 11:12,
        "Union number & density" = 13:15,
        "Protest Issues" = 16:19,
        "Action Forms" = 20:21,
        "Protest Actors" = 22:24,
        "Crisis (Ref: Normal Times)" = 26:28,
        "Political & economic context" = 29:33
    ),
    include.deviance = FALSE,
    include.loglik = FALSE,
    file = paste0(path, "/Appendix/table5_inst_var_DV1.html"),
)

# DV2

eff_sample <- pea_dat %>%
    select(
        private_public, Level_new, Tripartite_Council, RI, Eff_Number_Unions,
        Union_Density, Union_Density_sq,
        socialgroups_occup, other_SG,
        parties, part_num, region_Visser, crisis, unemployment_q,
        cab_leftright, iso2code, str_label, gdp_per_capita
    ) %>%
    na.omit()

# Amelia::missmap(eff_sample)

M1_rob <- glmer(private_public ~ Level_new + Tripartite_Council + RI + Eff_Number_Unions + Union_Density +
    1 + (1 | iso2code) + (1 | str_label), data = eff_sample, family = binomial(link = "logit"), nAGQ = 0L)

M2_rob <- glmer(update(formula(M1_rob), ~ . + Union_Density_sq),
    data = eff_sample, family = binomial(link = "logit"), nAGQ = 0L
)
M3_rob <- glmer(update(formula(M1_rob), ~ . + socialgroups_occup + other_SG + parties +
    part_num), data = eff_sample, family = binomial(link = "logit"), nAGQ = 0L)

M4_rob <- glmer(update(formula(M3_rob), ~ . + region_Visser),
    data = eff_sample, family = binomial(link = "logit"), nAGQ = 0L
)

M5_rob <- glmer(update(formula(M4_rob), ~ . + crisis + unemployment_q + cab_leftright + gdp_per_capita),
    data = eff_sample, family = binomial(link = "logit"), nAGQ = 0L
)

M6_rob <- glmer(update(formula(M5_rob), ~ . - Union_Density),
    data = eff_sample, family = binomial(link = "logit"), nAGQ = 0L
)

pea_dat2 <- pea_dat %>% 
  filter(!is.na(refugee_countries)) %>%
  select(
    private_public, socialgroups_occup, other_SG, parties, part_num, region_Visser, crisis, 
      unemployment_q, cab_leftright, gdp_per_capita, iso2code, str_label, covid_cases_q, 
     refugee_arrivals_q, iso2c) %>% 
  na.omit()

M7_rob <- glmer(private_public ~ socialgroups_occup + 
                  other_SG + parties + part_num + region_Visser + crisis + 
                  unemployment_q + cab_leftright + gdp_per_capita + covid_cases_q + refugee_arrivals_q +
                  (1 | iso2code) + (1 | str_label), data = pea_dat2,
                family = binomial(link = "logit"), nAGQ = 0L)



htmlreg(
    l = list(
        clean_model(M1_rob), clean_model(M2_rob), clean_model(M3_rob),
        clean_model(M4_rob), clean_model(M5_rob), clean_model(M7_rob)
    ), doctype = FALSE, center = TRUE, siunitx = TRUE,
    single.row = TRUE, no.margin = TRUE,
    caption = "",
    include.variance = FALSE,
    include.aic = FALSE,
    include.bic = FALSE,
    omit.coef = "(Intercept)",
    custom.coef.map = list(
        region_Visser2 = "Social-partnership (West)",
        region_Visser3 = "State-centred (South)",
        region_Visser4 = "Liberal",
        region_Visser5 = "CEE regimes",
        Level_new2 = "Intermediate central & industry",
        Level_new3 = "Sector/industry level",
        Level_new4 = "Intermediate industry & company",
        Level_new5 = "Company level",
        Tripartite_Council1 = "Council with different actors",
        Tripartite_Council2 = "Classical tripartite council",
        RI1 = "Partial concertation",
        RI2 = "Full concertation",
        Eff_Number_Unions = "Effective number of unions",
        Union_Density = "Union density",
        Union_Density_sq = "Union density squared",
        econ_private1 = "Private economic (0,1)",
        econ_public1 = "Public economic (0,1)",
        political1 = "Political (0,1)",
        cultural1 = "Cultural (0,1)",
        demo1 = "Demonstration (0,1)",
        strike1 = "Strike (0,1)",
        socialgroups_occup1 = "Occupational social group (0,1)",
        other_SG1 = "Other social group (0,1)",
        parties1 = "Parties (0,1)",
        part_num = "Participation rate",
        crisis1 = "Shock period",
        crisis2 = "Euro crisis",
        crisis3 = "Post-crises years (after 2015)",
        cab_leftright = "Cabinet left-right",
        unemployment_q = "Unemployment (quarterly)",
        gdp_per_capita = "GDP per capita",
        covid_cases_q = "Covid cases (quarterly)",
        refugee_arrivals_q = "Refugee arrivals (quarterly)"
    ),
    groups = list(
        "Region (Ref: Corporatist: North)" = 1:4,
        "Bargaining Level (Ref: Central)" = 5:8,
        "Tripartite Council (Ref: None)" = 9:10,
        "Routine Involvement (Ref: None)" = 11:12,
        "Union number & density" = 13:15,
        "Protest Actors" = 16:18,
        "Crisis (Ref: Normal Times)" = 20:22,
        "Political & economic context" = 23:27
    ),
    include.deviance = FALSE,
    include.loglik = FALSE,
    file = paste0(path, "/Appendix/table5_inst_var_DV2.html"),
)

# Two way fixed effects model

pea_dat2 <- pea_dat

pea_dat2$iso2c <- as.factor(pea_dat2$iso2c)
pea_dat2$year <- as.factor(pea_dat2$year)

M1_fe <- glm(
    unions ~ econ_private + econ_public + political + cultural +
        demo + strike + socialgroups_occup + other_SG + parties +
        part_num + region_Visser + crisis + unemployment_q + cab_leftright +
        year,
    data = pea_dat2, family = binomial(link = "logit")
)

M2_fe <- glm(
    private_public ~ socialgroups_occup + other_SG + parties + part_num +
        crisis + region_Visser + cab_leftright + unemployment_q +
        year,
    data = pea_dat2, family = binomial(link = "logit")
)


M1_rob <- coeftest(M1_fe, vcovCL(M1_fe, cluster = list(pea_dat2$year, pea_dat2$region_Visser)))

M2_rob <- coeftest(M2_fe, vcovCL(M2_fe, cluster = list(
    pea_dat2$year[!is.na(pea_dat2$private_public)],
    pea_dat2$region_Visser[!is.na(pea_dat2$private_public)]
)))

htmlreg(
    l = list(M1_rob, M2_rob), doctype = FALSE, center = TRUE, siunitx = TRUE,
    custom.model.names = c("Model 1 Union Sponsorship of Protest (=1)", "Model 2 Movement Character of Union Protest (=1)"),
    single.row = TRUE, no.margin = TRUE,
    caption = "",
    omit.coef = "\\b(Intercept|year|iso2c\\(\\w+\\))\\b",
    custom.gof.rows = list(
        "Country fixed effects" = rep("Yes", 2),
        "Year fixed effects" = rep("Yes", 2),
        "Adjusted R sq" = c(NagelkerkeR2(M1_fe)$R2, NagelkerkeR2(M2_fe)$R2),
        "Number of observations" = c(nobs(M1_fe), nobs(M2_fe))
    ),
    include.aic = FALSE,
    include.bic = FALSE,
    custom.coef.map = list(
        region_Visser2 = "Social-partnership (West)",
        region_Visser3 = "State-centred (South)",
        region_Visser4 = "Liberal",
        region_Visser5 = "CEE regimes",
        econ_private1 = "Private economic (0,1)",
        econ_public1 = "Public economic (0,1)",
        political1 = "Political (0,1)",
        cultural1 = "Cultural (0,1)",
        demo1 = "Demonstration (0,1)",
        strike1 = "Strike (0,1)",
        socialgroups_occup1 = "Occupational social group (0,1)",
        other_SG1 = "Other social group (0,1)",
        parties1 = "Parties (0,1)",
        part_num = "Participation rate",
        crisis1 = "Shock period",
        crisis2 = "Euro crisis",
        crisis3 = "Post-crises years (after 2015)",
        cab_leftright = "Cabinet left-right",
        unemployment_q = "Unemployment (quarterly)"
        # gdp_per_capita = "GDP per capita"
    ),
    groups = list(
        "Region (Ref: Corporatist: North)" = 1:4,
        "Protest Issues" = 5:8,
        "Action Forms" = 9:10,
        "Protest Actors" = 11:13,
        "Crisis (Ref: Normal Times)" = 15:17,
        "Political & economic context" = 18:19
    ),
    include.deviance = FALSE,
    include.loglik = FALSE,
    file = paste0(path, "/Appendix/table4_fixed_effects.html"),
)

# marginal effects figure for region

newdata <- pea_dat2 %>%
    select(
        econ_private, econ_public, political, cultural, demo,
        strike, socialgroups_occup, other_SG, parties, part_num, crisis,
        cab_leftright, unemployment_q, year, private_public
    ) %>% 
  mutate_if(is.factor, ~find_single_mode(.)) %>% 
  mutate_if(is.numeric, ~mean(., na.rm=TRUE)) %>% 
  mutate(region_Visser = as.factor(rep(c(1, 2, 3, 4, 5), length.out=nrow(pea_dat2)))) %>% 
  distinct()

m1_predictions <- predict(M1_fe, type = "response", se.fit = TRUE, newdata = newdata, 
                       vcov = vcovCL(M1_fe, cluster = list(pea_dat2$year,
                                                           pea_dat2$region_Visser)))

m1_predictions$x <- attr(m1_predictions$fit, "names")

m1_predictions <- as_tibble(m1_predictions) %>% 
  mutate(type = "Union Sponsorship of Protest") %>% 
  mutate(conf.low = fit - 1.96 * se.fit, 
         conf.high = fit + 1.96 * se.fit)

newdata <- pea_dat2 %>%
  filter(!is.na(private_public)) %>% 
  select(socialgroups_occup, other_SG, parties, part_num, 
         crisis, region_Visser, cab_leftright, unemployment_q,
         year) %>% 
  mutate_if(is.factor, ~find_single_mode(.)) %>% 
  mutate_if(is.numeric, ~mean(., na.rm=TRUE)) %>% 
  mutate(region_Visser = as.factor(rep(c(1, 2, 3, 4, 5), 
                                       length.out=nrow(pea_dat2[!is.na(pea_dat2$private_public),])))) %>% 
  distinct()

m2_predictions <- predict(M2_fe, type = "response", se.fit = TRUE, newdata = newdata, 
                          vcov = vcovCL(M2_fe, 
                                        cluster = list(pea_dat2$year[!is.na(pea_dat2$private_public)],
                                                       pea_dat2$region_Visser[!is.na(pea_dat2$private_public)])))

m2_predictions$x <- attr(m2_predictions$fit, "names")

m2_predictions <- as_tibble(m2_predictions) %>% 
  mutate(type = "Movement Character of Union Protest") %>% 
  mutate(conf.low = fit - 1.96 * se.fit, 
         conf.high = fit + 1.96 * se.fit)


mar_dat <- bind_rows(m1_predictions, m2_predictions) %>%
    mutate(labels = case_when(
      x == 1 ~ "Corporatism (North)",
      x == 2 ~ "Social-partnership (West)",
      x == 3 ~ "State-centered (South)",
      x == 4 ~ "Liberal",
      x == 5 ~ "CEE regimes"
    )) %>%
    mutate(labels = factor(labels, levels = c(
        "CEE regimes",
        "Liberal",
        "State-centered (South)",
        "Social-partnership (West)",
        "Corporatism (North)"
    ))) %>%
    mutate(type = factor(type, levels = c(
        "Union Sponsorship of Protest",
        "Movement Character of Union Protest"
    )))

ggplot(data = mar_dat, aes(
    x = fit, y = labels,
    xmin = conf.low, xmax = conf.high
)) +
    facet_wrap(~type, ncol = 2, scale = "free_x") +
    geom_point() +
    geom_pointrange() +
    ylab("") +
    xlab("Predicted Probabilities") +
    theme_bw() +
    theme(
        legend.title = element_blank(),
        legend.position = "bottom"
    )

ggsave(
    plot = last_plot(),
    path = paste0(path, "/Appendix/"),
    filename = "figure5_2wayfe.png",
    dpi = 400,
    width = 7,
    height = 4
)
